//Purpose: Use affiliate data to create a countryXfirm dataset and a firm level affiliate dataset 

//Set directories
cd ""  /* PROJECT ROOT FOLDER */
global data "data/"
global output output 


set more off



*******************************************************
*Create datasets for foreign affiliates of U.S. MNLs
*******************************************************


set more off

cap program drop clean_aff
program define clean_aff
  syntax , year(integer)

  // load affiliate level data 
  use if missing(firmid)==0 using "$data/mnl_foreign_affiliates`year'.dta", clear
	rename *, lower

  // Identify affiliate ownership type
  gen majorityowned = (aff_type_ownership=="Majority")
  sum majorityowned



  bys majorityowned: summ test*  // Missing details for non-majority-owned

  keep if majorityowned
  drop majorityowned



  rename us_id us_id_out
  rename bea_name bea_name_out

  tostring aff_naics, gen(ff_naics4)
  gen manuf=1 if substr(ff_naics4,1,1)=="3"

  gen sector="manuf" if manuf==1
  replace sector="other" if manuf~=.

  tostring aff_ind_parent, replace
  tab aff_ind_parent
  gen bea_manuf = 1 if substr(aff_ind_parent,1,1)=="3"
  replace bea_manuf = 0 if substr(aff_ind_parent,1,1)~="3"
  tab bea_manuf
  label var bea_manuf "us parent is manufacturing, acc.  to BEA code"


  *assess variation in manuf within mnes across affiliates
  bys firmid: egen tot_manuf=sum(manuf)
  gen plant=1
  bys firmid: egen tot_plants=sum(plant)
  gen manuf_plant_share=tot_manuf/tot_plants
  summ manuf_plant_share, d   




  label variable aff_im "imports to the U.S. by the forein affiliate"
  label variable aff_ex "exports from the U.S. to the foreign affiliate"
  label variable aff_goods "sales of goods"
  label variable aff_svc "sales of services"
  label variable aff_sales_ins "were any sales generated by insurance related activities"

  label define yesno 1 "yes" 2 "no"
  label values aff_sales_ins yesno
  
  sum test*, d


  **sample restriction
  **only keep affiliates with positive sales or emp 
  keep if (aff_emp>0 | aff_sales>0) 


  *Distinguish between manuf and non-manuf affiliate activity
  foreach var in aff_sales aff_sales_oth_fa aff_sales_local_unaff aff_sales_local_fa ///
		aff_sales_oth_unaff aff_sales_us_unaff aff_sales_us_rep aff_sales_ins aff_emp {
		gen man_`var'=`var' if manuf==1
		}
		
  gen man_aff = 1 if (man_aff_sales>0 & man_aff_sales~=.) | (man_aff_emp>0 & man_aff_emp~=.)


  gen bank_aff = inlist(fa_entity_doc_type,6,8,21)
  gen bank_usentity = inlist(us_entity_doc_type,6,8,21)


  *create firm level dataset 
  preserve 
    gen number_of_aff=1
    *bys firmid aff_ctry: gen number_of_countries = 1 if _n==1   /* don't do this since not limited to manuf aff */
    collapse (sum) aff_sales* aff_emp number_of_aff man_* vertical (mean) aff_emp_parent bank_aff bea_manuf (max)  bank_usentity , by(firmid)
    gen sh_aff_vertical = vertical / number_of_aff
    drop vertical 
    
    sum bea_manuf, d

    *correct for sales to other foreign affiliates 
    * replace aff_sales = aff_sales - aff_sales_local_fa - aff_sales_oth_fa  /*  Do this elsewhere...  */

    label var number_of_aff "number foreign affiliates"
    label variable man_aff "number of manuf foreign affiliates"
  * label var number_of_countries "number countries with a foreign affiliate"
    label var sh_aff_vertical "share of firms affiliates that sell 80 pct to parent firm"
    label var bank_aff "share of affiliates that are banks"
    label var bank_usentity "us parent is a bank" 
    label var bea_manuf "us parent is manufacturing, acc.  to BEA code"
    save $data/mnl_foreign_affiliates_firms`year'.dta, replace 
  restore



  *create firm-country level dataset
  gen number_of_aff=1
  collapse (sum) aff_sales* aff_emp man_* number_of_aff vertical (mean) bank_aff (max)  bank_usentity, by(firmid aff_ctry )
  gen sh_aff_vertical = vertical / number_of_aff
  drop vertical 
  rename aff_ctry ctry

  *at country level we don't want to count sales to other affiliates in the same country as sales
  *replace aff_sales = aff_sales - aff_sales_local_fa


  label var number_of_aff "number foreign affiliates in country"
  label var sh_aff_vertical "share of firmXcountry affiliates that sell 80 pct to parent firm"
  label variable man_aff "number of manuf foreign affiliates"
  label var bank_aff "share of affiliates that are banks"
  label var bank_usentity "us reporter is a bank"
  save "$data/aff_firm_country_`year'.dta", replace 

end //end program


clean_aff, year(2007)

/*
//
forvalues yr=2007/2012 {
  clean_aff, year(`yr')
}
*/











